{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "import statsmodels.api as sm\n",
      "import statsmodels.formula.api as smf\n",
      "import pandas as pd\n",
      "import seaborn as sb\n",
      "import matplotlib.pyplot as plt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dat = pd.read_csv('data/vc_1.json_remake',index_col=0,parse_dates=True)\n",
      "dat.points.interpolate(inplace=True)\n",
      "# 24h * 60min / (15min) * 7 days\n",
      "decomfreq = 24 * 60"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 127
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dat = pd.read_csv('data/vc_3.json_remake',index_col=0,parse_dates=True)\n",
      "dat.points.interpolate(inplace=True)\n",
      "decomfreq = 24 * 60\n",
      "%matplotlib\n",
      "seasonal_decomp(data=dat.points,freq=decomfreq)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Using matplotlib backend: Qt4Agg\n"
       ]
      },
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 139,
       "text": [
        "Int64Index([ 1225,  1227,  1229,  1230,  1233,  9920,  9923,  9941,  9947,\n",
        "             9953,\n",
        "            ...\n",
        "            30184, 30188, 30203, 30253, 30772, 30775, 30850, 30911, 34568,\n",
        "            37453],\n",
        "           dtype='int64', length=125)"
       ]
      }
     ],
     "prompt_number": 139
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Using matplotlib backend: Qt4Agg\n"
       ]
      },
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 136,
       "text": [
        "Int64Index([ 1225,  1227,  1229,  1230,  1233,  9920,  9923,  9941,  9947,\n",
        "             9953,\n",
        "            ...\n",
        "            30184, 30188, 30203, 30253, 30772, 30775, 30850, 30911, 34568,\n",
        "            37453],\n",
        "           dtype='int64', length=125)"
       ]
      }
     ],
     "prompt_number": 136
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def seasonal_decomp(data=None, freq=None,model='addtitive',mode='median'):\n",
      "    dat = data\n",
      "    decomfreq = freq\n",
      "    res = sm.tsa.seasonal_decompose(dat.interpolate().tolist(),freq=decomfreq, model=model)\n",
      "#     res.plot()\n",
      "    random = pd.Series(res.resid)\n",
      "    if(mode=='average'):\n",
      "        mean_nan = np.nanmean(random)\n",
      "        std_nan = np.nanstd(random)\n",
      "    elif(mode=='median'):\n",
      "        rolling_median = random.rolling(3).median()\n",
      "        mean_nan = np.nanmean(rolling_median)\n",
      "        std_nan = np.nanstd(rolling_median)\n",
      "    min_val = mean_nan - 4*std_nan\n",
      "    # max_val = mean(random, na.rm = T) + 4*sd(random, na.rm = T)\n",
      "    max_val = mean_nan + 4*std_nan\n",
      "    position = pd.Series(res.resid.tolist(),index=np.arange(res.resid.shape[0]))\n",
      "    anomaly = position[(position>max_val) | (position<min_val)]\n",
      "    # anomalyL = position[(position<min_val)]\n",
      "    # anomaly = anomalyH.append(anomalyL).drop_duplicates()\n",
      "    point_anomaly_idx = anomaly.index\n",
      "    points_anomaly = dat[point_anomaly_idx]\n",
      "    ax = plt.subplot()\n",
      "    # dat.points.plot(ax=ax)\n",
      "    ax.plot(dat, color='red')\n",
      "    ax.plot(points_anomaly,'o')\n",
      "    number_of_anomalies = point_anomaly_idx.shape[0]\n",
      "    ax.set_title(\"Anomaly Detection: %s\"%number_of_anomalies)\n",
      "    return point_anomaly_idx"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 135
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# res = sm.tsa.seasonal_decompose(dat.points.interpolate().tolist(),freq=decomfreq, model='addtitive')\n",
      "# #     res.plot()\n",
      "\n",
      "# # random = pd.Series(res.resid)\n",
      "# # rolling_median = random.rolling(3).median()\n",
      "# # mean_nan = np.nanmean(rolling_median)\n",
      "# # std_nan = np.nanstd(rolling_median)\n",
      "# # min_val = mean(random, na.rm = T) - 4*sd(random, na.rm = T)\n",
      "# random = res.resid\n",
      "# mean_nan = np.nanmean(random)\n",
      "# std_nan = np.nanstd(random)\n",
      "# min_val = mean_nan - 4*std_nan\n",
      "# # max_val = mean(random, na.rm = T) + 4*sd(random, na.rm = T)\n",
      "# max_val = mean_nan + 4*std_nan\n",
      "# # %matplotlib\n",
      "# # ax = plt.subplot()\n",
      "# # ax.plot(res.resid)\n",
      "# # plt.axhline(y=min_val)\n",
      "# # plt.axhline(y=max_val)\n",
      "# position = pd.Series(res.resid.tolist(),index=np.arange(res.resid.shape[0]))\n",
      "# anomaly = position[(position>max_val) | (position<min_val)]\n",
      "# # anomalyL = position[(position<min_val)]\n",
      "# # anomaly = anomalyH.append(anomalyL).drop_duplicates()\n",
      "# point_anomaly_idx = anomaly.index\n",
      "\n",
      "# points_anomaly = dat.points[point_anomaly_idx]\n",
      "# %matplotlib\n",
      "# ax = plt.subplot()\n",
      "# number_of_anomalies = point_anomaly_idx.shape[0]\n",
      "# ax.set_title(\"Anomaly Detection: %s\"%number_of_anomalies)\n",
      "# # dat.points.plot(ax=ax)\n",
      "# ax.plot(dat.points, color='red',linewidth=0.5)\n",
      "# ax.plot(points_anomaly,'o')\n",
      "# ax.grid(False)\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 130
    }
   ],
   "metadata": {}
  }
 ]
}